{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.4"
    },
    "colab": {
      "name": "mvor_demo.ipynb",
      "provenance": []
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Xm739DCTDRlw",
        "colab_type": "text"
      },
      "source": [
        "Copyright (c) University of Strasbourg. All Rights Reserved.\n",
        "# MVOR\n",
        "**A Multi-view RGB-D Operating Room Dataset for 2D and 3D Human Pose Estimation (MICCAI-LABELS-2018)**\n",
        "\n",
        "_Vinkle Srivastav, Thibaut Issenhuth, Abdolrahim Kadkhodamohammadi, Michel de Mathelin, Afshin Gangi, Nicolas Padoy_\n",
        "\n",
        "[![MVOR Dataset](https://img.shields.io/badge/MVOR-Dataset-blue)](http://camma.u-strasbg.fr/datasets/) [![arXiv](https://img.shields.io/badge/arxiv-1808.08180-red)](https://arxiv.org/abs/1808.08180) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/CAMMA-public/MVOR/blob/master/mvor_demo.ipynb)\n",
        "\n",
        "\n",
        "This demo notebook shows the 2D/3D ground truth visualization on a MVOR dataset and evaluations results. \n",
        "\n",
        "**Code below needed only for the colab demo. Please make sure to enable \"GPU\" using EDIT->Notebook settings**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pWnhxxa0DRlx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Download the code, and the MVOR dataset\n",
        "!git clone https://github.com/CAMMA-public/MVOR.git\n",
        "%cd MVOR\n",
        "\n",
        "!wget https://s3.unistra.fr/camma_public/datasets/mvor/camma_mvor_dataset.zip\n",
        "!unzip -q camma_mvor_dataset.zip && rm camma_mvor_dataset.zip"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zFLJE2OWDRl0",
        "colab_type": "text"
      },
      "source": [
        "## Imports"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EwPOAVm-DRl1",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import json\n",
        "import random\n",
        "import matplotlib.pyplot as plt\n",
        "import cv2\n",
        "import os\n",
        "import torch\n",
        "import torchvision\n",
        "import numpy as np\n",
        "from IPython.display import display\n",
        "from torchvision.transforms import functional as F\n",
        "from PIL import Image\n",
        "from mpl_toolkits.mplot3d.axes3d import Axes3D\n",
        "from lib.visualize_groundtruth import create_index, viz2d, plt_imshow, bgr2rgb, plt_3dplot, coco_to_camma_kps, progress_bar\n",
        "from matplotlib.axes._axes import _log as matplotlib_axes_logger\n",
        "matplotlib_axes_logger.setLevel('ERROR')\n",
        "USE_GPU = True\n",
        "\n",
        "if torch.cuda.is_available() and USE_GPU:\n",
        "    DEVICE = torch.device(\"cuda\")\n",
        "else:\n",
        "    DEVICE = torch.device(\"cpu\")\n",
        "\n",
        "print(\"Using device:\", DEVICE)\n",
        "\n",
        "# Define the paths\n",
        "DATASET_ROOT = \"camma_mvor_dataset\"\n",
        "GT_ANNO_PATH = \"annotations/camma_mvor_2018.json\"\n",
        "\n",
        "# load the ground truth annotations\n",
        "camma_mvor_gt = json.load(open(GT_ANNO_PATH))\n",
        "anno_2d, anno3d, mv_paths, imid_to_path = create_index(camma_mvor_gt)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ptUEIsnPDRl3",
        "colab_type": "text"
      },
      "source": [
        "## MVOR Stats"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "DiTZlMiQDRl3",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!python lib/show_stats.py --gt annotations/camma_mvor_2018.json"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "IJXCcm0ODRl6",
        "colab_type": "text"
      },
      "source": [
        "## Ground truth visualization"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "s7g9E9zgDRl6",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#imid_3d = \"10010000013_10020000013_10030000013\"\n",
        "# Read a random multi-view image\n",
        "imid_3d = random.choice(list(mv_paths.keys()))\n",
        "imids_2d = [int(m) for m in imid_3d.split(\"_\")]\n",
        "imgs = [cv2.imread(os.path.join(DATASET_ROOT, imid_to_path[_p])) for _p in imids_2d]\n",
        "\n",
        "# get the 2D and 3D annotation for the corresponding multi-view image\n",
        "anns2d = [anno_2d[str(ann)] for ann in imids_2d]\n",
        "anns3d = anno3d[imid_3d]\n",
        "\n",
        "# Render 2d and 3D annotations \n",
        "imgs_render_gt = [viz2d(im, ann) for im, ann in zip(imgs, anns2d)]\n",
        "fig = plt.figure(figsize=(12,10))\n",
        "print(\"Visualizing the ground truth\")\n",
        "fig.add_subplot(2, 2, 1); plt.imshow(bgr2rgb(imgs_render_gt[0])); plt.title(\"CAM-1\"); plt.axis(\"off\")\n",
        "fig.add_subplot(2, 2, 2); plt.imshow(bgr2rgb(imgs_render_gt[1])); plt.title(\"CAM-2\"); plt.axis(\"off\")\n",
        "fig.add_subplot(2, 2, 3); plt.imshow(bgr2rgb(imgs_render_gt[2])); plt.title(\"CAM-3\"); plt.axis(\"off\")\n",
        "plt_3dplot(fig, anns3d, camma_mvor_gt)\n",
        "plt.show()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JS49YDVZDRl9",
        "colab_type": "text"
      },
      "source": [
        "**Run the above cell again to check the result on different multiview image**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "PeCLzZvPDRl9",
        "colab_type": "text"
      },
      "source": [
        "## Visualizing the 2D predictions from the Keypoint-MaskRCNN model\n",
        "\n",
        "Here we run a Keypoint-MaskRCNN model from [pytorch's torchvision library](https://pytorch.org/docs/stable/torchvision/models.html#object-detection-instance-segmentation-and-person-keypoint-detection) on the same multi-view image"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-jTqMlEDDRl9",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# load the model and pre-trained weights\n",
        "model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)\n",
        "# put the model on the eval mode and assign either \"cpu\" or \"gpu\" device\n",
        "model.eval()\n",
        "model = model.to(DEVICE)\n",
        "\n",
        "# convert input multi-view images to pytorch tensors\n",
        "imgs_tensor = [F.to_tensor(Image.open(os.path.join(DATASET_ROOT, imid_to_path[p])).convert('RGB')).to(DEVICE) \n",
        "       for p in imids_2d]\n",
        "\n",
        "# run the model on the multiview image\n",
        "with torch.no_grad():\n",
        "    predictions_2d = model(imgs_tensor)\n",
        "\n",
        "# get the bounding boxes and keypoint detections\n",
        "boxes = [pred[\"boxes\"].cpu().numpy() for pred in predictions_2d]\n",
        "keypoints = [pred[\"keypoints\"].cpu().numpy() for pred in predictions_2d]\n",
        "\n",
        "# put the results in the list for the visualization\n",
        "all_anns = []\n",
        "for bb, kps in zip(boxes, keypoints):\n",
        "    anns = []\n",
        "    for index, (b, kp) in enumerate(zip(bb, kps)):\n",
        "        b = [b[0], b[1], b[2]-b[0], b[3]-b[1]]\n",
        "        anns.append({\"bbox\": b, \"keypoints\": coco_to_camma_kps(kp), \"person_id\": index, \"only_bbox\": 0})\n",
        "    all_anns.append(anns)\n",
        "    \n",
        "# render the results\n",
        "imgs_render_pred = [viz2d(im, ann) for im, ann in zip(imgs, all_anns)]\n",
        "fig = plt.figure(figsize=(20,18))\n",
        "print(\"Visualizing the 2D predictions from the Keypoint-MaskRCNN model\")\n",
        "fig.add_subplot(1, 3, 1); plt.imshow(bgr2rgb(imgs_render_pred[0])); plt.title(\"CAM-1\"); plt.axis(\"off\")\n",
        "fig.add_subplot(1, 3, 2); plt.imshow(bgr2rgb(imgs_render_pred[1])); plt.title(\"CAM-2\"); plt.axis(\"off\")\n",
        "fig.add_subplot(1, 3, 3); plt.imshow(bgr2rgb(imgs_render_pred[2])); plt.title(\"CAM-3\"); plt.axis(\"off\")\n",
        "plt.show()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Ne6pAJyTDRmA",
        "colab_type": "text"
      },
      "source": [
        "## Evaluation of the Keypoint-MaskRCNN model on the MVOR dataset\n",
        "Here, we evaluate the model for 2D person bounding box and 2D person keypoints on the MVOR dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "epKWaUw3DRmA",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Helper function for the evalution\n",
        "@torch.no_grad()\n",
        "def evaluate():\n",
        "    print(\"Evaluating Keypoint-MaskRCNN on the MVOR dataset\")\n",
        "    bar = display(progress_bar(1, len(list(imid_to_path.keys()))), display_id=True)\n",
        "    # list to store the results in coco format\n",
        "    pred_results = []\n",
        "    # iterating over each image\n",
        "    for index, (im_id, path) in enumerate(imid_to_path.items()):\n",
        "        # convert input image to pytorch tensor\n",
        "        img = [F.to_tensor(Image.open(os.path.join(DATASET_ROOT, path)).convert('RGB')).to(DEVICE)]\n",
        "        # forward propagation through the model\n",
        "        pred_2d = model(img)\n",
        "        # get the bounding box, keypoints, and scores from the prediction results\n",
        "        boxes = pred_2d[0][\"boxes\"].cpu().numpy()\n",
        "        keypoints = pred_2d[0][\"keypoints\"].cpu().numpy()\n",
        "        scores = pred_2d[0][\"scores\"].cpu().numpy()\n",
        "        \n",
        "        # update the results in coco format\n",
        "        for box, kps, sc in zip(boxes, keypoints, scores):\n",
        "            camma_kps = [float(_a) for _a in coco_to_camma_kps(kps).reshape(-1).flatten().tolist()]\n",
        "            coco_kps = [float(_a) for _a in kps.reshape(-1).flatten().tolist()]\n",
        "            box = [float(b) for b in box]\n",
        "            pred_results.append({\n",
        "                \"image_id\": int(im_id),\n",
        "                \"category_id\": \"1\",\n",
        "                \"bbox\": [box[0], box[1], box[2]-box[0], box[3]-box[1]],\n",
        "                \"keypoints\":camma_kps,\n",
        "                \"coco_keypoints\":coco_kps,\n",
        "                \"score\": float(sc)\n",
        "            })     \n",
        "        bar.update(progress_bar(index + 1, len(list(imid_to_path.keys()))))  \n",
        "    return pred_results\n",
        "\n",
        "# call the function for evaluation\n",
        "pred_results = evaluate()\n",
        "\n",
        "print(\"Writing result in the coco format\\n\")    \n",
        "with open(\"keypoint_maskrcnn_pred_results.json\", \"w\") as f:\n",
        "    json.dump(pred_results, f)\n",
        "    \n",
        "# Evaluation for 2D person bounding box\n",
        "print(\"--------------------------------------------------------------------\")\n",
        "print(\"2D person bounding box results for the Keypoint-MaskRCNN model\")\n",
        "!python lib/eval_ap_2dperson.py --gt annotations/camma_mvor_2018.json --dt keypoint_maskrcnn_pred_results.json    \n",
        "print(\"--------------------------------------------------------------------\\n\")\n",
        "# Evaluation for 2D person keypoints\n",
        "print(\"--------------------------------------------------------------------\")\n",
        "print(\"2D person keypoints results for the Keypoint-MaskRCNN model\\n\")\n",
        "!python lib/eval_pck_2dpose.py --gt annotations/camma_mvor_2018.json --dt keypoint_maskrcnn_pred_results.json\n",
        "print(\"--------------------------------------------------------------------\")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XiTiYlP4DRmD",
        "colab_type": "text"
      },
      "source": [
        "## 2D pose estimation evaluation results\n",
        "Here, we provide results for 2D keypoints from the json files obtained by running the 2D keypoint detectors on the MVOR dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iTsYYyplDRmD",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# openpose\n",
        "print(\"Evaluation results for the OpenPose\")\n",
        "!python lib/eval_pck_2dpose.py --gt annotations/camma_mvor_2018.json --dt detections_results/openpose_kps.json\n",
        "\n",
        "# openpose with multiscale testing\n",
        "print(\"Evaluation results for the OpenPose with multi-scale testing\")\n",
        "!python lib/eval_pck_2dpose.py --gt annotations/camma_mvor_2018.json --dt detections_results/openpose_kps_multiscale.json\n",
        "\n",
        "# alphapose\n",
        "print(\"Evaluation results for the AlphaPose\")\n",
        "!python lib/eval_pck_2dpose.py --gt annotations/camma_mvor_2018.json --dt detections_results/alphapose_kps.json\n",
        "\n",
        "# rtpose\n",
        "print(\"Evaluation results for the RTPose\")\n",
        "!python lib/eval_pck_2dpose.py --gt annotations/camma_mvor_2018.json --dt detections_results/rtpose_kps.json"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "whUZ6RaPDRmF",
        "colab_type": "text"
      },
      "source": [
        "## 2D person bounding box detection evaluation results\n",
        "Here, we provide results for 2D person bounding box from the json files obtained by running the 2D person bounding box on the MVOR dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2K8viaBxDRmG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# faster rcnn\n",
        "print(\"\\n--------------------------------------------------------------------\")\n",
        "print(\"Evaluation results for the Faster-RCNN\")\n",
        "print(\"--------------------------------------------------------------------\")\n",
        "!python3 lib/eval_ap_2dperson.py --gt annotations/camma_mvor_2018.json --dt detections_results/faster_rcnn_bbox.json\n",
        "\n",
        "# deformable conv-nets R-FCN\n",
        "print(\"\\n--------------------------------------------------------------------\")\n",
        "print(\"Evaluation results for the deformable conv-nets R-FCN\")\n",
        "print(\"--------------------------------------------------------------------\")\n",
        "!python3 lib/eval_ap_2dperson.py --gt annotations/camma_mvor_2018.json --dt detections_results/dfcnet_rfcn_bbox.json"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "0I7Hd86aDRmH",
        "colab_type": "text"
      },
      "source": [
        "2D person boudning box detection from keypoints (here, the AR is not averaged over IoU in [0.50, 0.55, ..., 0.95], it is only given for IoU = 0.50)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zeflvZGHDRmI",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# openpose default\n",
        "print(\"\\n--------------------------------------------------------------------\")\n",
        "print(\"Evaluation results for the OpenPose\")\n",
        "print(\"--------------------------------------------------------------------\")\n",
        "!python lib/eval_ap_2dperson.py --gt annotations/camma_mvor_2018.json --dt detections_results/openpose_bbox.json\n",
        "\n",
        "# openpose multiscale\n",
        "print(\"\\n--------------------------------------------------------------------\")\n",
        "print(\"Evaluation results for the OpenPose mult-scale\")\n",
        "print(\"--------------------------------------------------------------------\")\n",
        "!python lib/eval_ap_2dperson.py --gt annotations/camma_mvor_2018.json --dt detections_results/openpose_bbox_multiscale.json\n",
        "\n",
        "# alphapose default\n",
        "print(\"\\n--------------------------------------------------------------------\")\n",
        "print(\"Evaluation results for the AlphaPose\")\n",
        "print(\"--------------------------------------------------------------------\")\n",
        "!python lib/eval_ap_2dperson.py --gt annotations/camma_mvor_2018.json --dt detections_results/alphapose_bbox.json\n",
        "\n",
        "# rtpose\n",
        "print(\"\\n--------------------------------------------------------------------\")\n",
        "print(\"Evaluation results for the RTPose\")\n",
        "print(\"--------------------------------------------------------------------\")\n",
        "!python lib/eval_ap_2dperson.py --gt annotations/camma_mvor_2018.json --dt detections_results/rtpose_bbox.json"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}
